---
title: "R Notebook: BMJ analysis"
output: html_notebook
---




```{r message=FALSE, include=FALSE}
library(readtext)
library(quanteda)
library(tidyverse)
library(quanteda.textplots)
library(quanteda.textstats)
library(quanteda.textmodels)
```



```{r include=FALSE}
DATA_DIR <- "~/Dropbox/Research/BMJ Paper/" 

reports <- readtext(paste0(DATA_DIR, "REPORTS/*"), 
                                 docvarsfrom = "filenames", docvarnames = c("Org", "Event", "Year"))

reports$doc_id <- str_replace(reports$doc_id , ".pdf", "") 
```



Text cleaning and transformation
```{r}

reports$text <- textclean::replace_non_ascii(reports$text)
reports$text <- textclean::replace_symbol(reports$text)
reports$text <- textclean::replace_white(reports$text)
reports$text <- textclean::replace_contraction(reports$text)
reports$text <- textclean::replace_html(reports$text)
reports$text <- textclean::replace_kern(reports$text)
reports$text <- textclean::replace_money(reports$text)


reports$text <- textclean::mgsub(reports$text, c("doi", "https", "org"),c("","", "") )
```




```{r include=FALSE}
reports_corpus <- corpus(reports, text_field = "text") 
```



```{r eval=FALSE, include=FALSE}

summary <- as.data.frame(summary(reports_corpus))

write_csv(summary, "corpus_summary.csv")

#corp_summary <- summarise(group_by(summary(ndc_corpus, n = 32),Country), total_sentences=sum(Sentences),total_words=sum(Tokens))

#readr::write_csv(corp_summary, "corp_summary.csv")

```



#minimal pre-processing and tokenisation
```{r include=FALSE}
tokens <- tokens(reports_corpus, what = "word",
              remove_punct = TRUE,
              remove_symbols = TRUE,
              remove_numbers = TRUE,
              remove_url = TRUE,
              split_hyphens = TRUE,
              verbose = TRUE) %>%
  tokens_tolower() %>%
   tokens_select(stopwords("english"), selection = "remove", padding = FALSE, verbose = TRUE) %>%
     tokens_select(c("doi", "https"), selection = "remove", padding = FALSE, verbose = TRUE) %>%
  tokens_select(c("[\\d-]", "[[:punct:]]", "^.{2}$"),
                selection = "remove", 
                valuetype="regex", 
                min_nchar = 2L,
                verbose = TRUE) 

  

```




```{r}
tokens_ngram <- tokens_ngrams(tokens, n = 1:2, skip = 0, concatenator = "_")
```



```{r include=FALSE}
reports_dfm <- tokens_ngram %>% dfm()
```

#All reports "health"

```{r}
kwic_health <- kwic(tokens_ngram, "health", window = 25, valuetype = "fixed")

readr::write_csv(kwic_health, "kwic_health.csv")

```


```{r}
corpus_health <- corpus(kwic_health, split_context = FALSE, extract_keyword = TRUE)

health_dfm <- corpus_health %>% tokens() %>% dfm()

```



```{r}

pdf("all_reports_50.pdf", width = 7, height = 7)

textplot_wordcloud(health_dfm, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red")

dev.off()
```


```{r}
# network graph
fcm <- fcm(tokens(corpus_health), context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))
```

```{r}
set.seed(1234)
fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.3)
 ggsave("network_cooccurrence.pdf")
```

```{r}
reports_dfm@docvars$docname_
```




```{r}
health_dict <- dictionary(list(health = c("malaria", "diarrhoea", "infection", "disease", "diseases", "sars", "measles", "pneumonia", "epidemic", "epidemics", "pandemic", "pandemics", "epidemiology", "healthcare", "health", "mortality", "morbidity", "nutrition", "illness", "illnesses", "ncd", "ncds", "air_pollution", "nutrition", "malnutrition", "malnourishment", "mental_disorder", "mental_disorders", "stunting")))
```




#CBD_ConventionBiodiversitycop2014_2019

```{r}

tokens_CBD_ConventionBiodiversitycop2014_2019 <- tokens_subset(tokens_ngram, docname_ == "CBD_ConventionBiodiversitycop2014_2019")


kwic_CBD_ConventionBiodiversitycop2014_2019 <- kwic(tokens_CBD_ConventionBiodiversitycop2014_2019, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_CBD_ConventionBiodiversitycop2014_2019, "kwic_CBD_ConventionBiodiversitycop2014_2019_health.csv")


corpus_CBD_ConventionBiodiversitycop2014_2019 <- corpus(kwic_CBD_ConventionBiodiversitycop2014_2019, split_context = FALSE, extract_keyword = TRUE)

dfm_CBD_ConventionBiodiversitycop2014_2019 <- corpus_CBD_ConventionBiodiversitycop2014_2019 %>% tokens() %>% dfm()


# network graph
fcm <- fcm(tokens(corpus_CBD_ConventionBiodiversitycop2014_2019), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_CBD_ConventionBiodiversitycop2014_2019.pdf")




pdf("dfm_CBD_ConventionBiodiversitycop2014_2019.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_CBD_ConventionBiodiversitycop2014_2019, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```




```{r}
features <- textstat_frequency(dfm_CBD_ConventionBiodiversitycop2014_2019, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_CBD_ConventionBiodiversitycop2014_2019.pdf")


features$docname <- "CBD_ConventionBiodiversitycop2014_2019"
  
features %>% write_csv("features_one.csv")

```






#CBD_GlobalBiodiversityOutlook5_2020
```{r}

tokens_CBD_GlobalBiodiversityOutlook5_2020 <- tokens_subset(tokens_ngram, docname_ == "CBD_GlobalBiodiversityOutlook5_2020")


kwic_CBD_GlobalBiodiversityOutlook5_2020 <- kwic(tokens_CBD_GlobalBiodiversityOutlook5_2020, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_CBD_GlobalBiodiversityOutlook5_2020, "kwic_CBD_GlobalBiodiversityOutlook5_2020_health.csv")


corpus_CBD_GlobalBiodiversityOutlook5_2020 <- corpus(kwic_CBD_GlobalBiodiversityOutlook5_2020, split_context = FALSE, extract_keyword = TRUE)

dfm_CBD_GlobalBiodiversityOutlook5_2020 <- corpus_CBD_GlobalBiodiversityOutlook5_2020 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_CBD_GlobalBiodiversityOutlook5_2020), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_CBD_GlobalBiodiversityOutlook5_2020.pdf")




pdf("dfm_CBD_GlobalBiodiversityOutlook5_2020.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_CBD_GlobalBiodiversityOutlook5_2020, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```

```{r}
features <- textstat_frequency(dfm_CBD_GlobalBiodiversityOutlook5_2020, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_CBD_GlobalBiodiversityOutlook5_2020.pdf")


features$docname <- "CBD_GlobalBiodiversityOutlook5_2020"
  
features %>% write_csv("features_two.csv")

```


#FAO_AgriFoodSys_2021

```{r}

tokens_FAO_AgriFoodSys_2021 <- tokens_subset(tokens_ngram, docname_ == "FAO_AgriFoodSys_2021")


kwic_FAO_AgriFoodSys_2021 <- kwic(tokens_FAO_AgriFoodSys_2021, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_FAO_AgriFoodSys_2021, "kwic_FAO_AgriFoodSys_2021_health.csv")


corpus_FAO_AgriFoodSys_2021 <- corpus(kwic_FAO_AgriFoodSys_2021, split_context = FALSE, extract_keyword = TRUE)

dfm_FAO_AgriFoodSys_2021 <- corpus_FAO_AgriFoodSys_2021 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_FAO_AgriFoodSys_2021), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_FAO_AgriFoodSys_2021.pdf")




pdf("dfm_FAO_AgriFoodSys_2021.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_FAO_AgriFoodSys_2021, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```



```{r}
features <- textstat_frequency(dfm_FAO_AgriFoodSys_2021, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_FAO_AgriFoodSys_2021.pdf")


features$docname <- "FAO_AgriFoodSys_2021"
  
features %>% write_csv("features_three.csv")

```




#GCF_GovClimateFuture_2021

```{r}

tokens_GCF_GovClimateFuture_2021 <- tokens_subset(tokens_ngram, docname_ == "GCF_GovClimateFuture_2021")


kwic_GCF_GovClimateFuture_2021 <- kwic(tokens_GCF_GovClimateFuture_2021, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_GCF_GovClimateFuture_2021, "kwic_GCF_GovClimateFuture_2021_health.csv")


corpus_GCF_GovClimateFuture_2021 <- corpus(kwic_GCF_GovClimateFuture_2021, split_context = FALSE, extract_keyword = TRUE)

dfm_GCF_GovClimateFuture_2021 <- corpus_GCF_GovClimateFuture_2021 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_GCF_GovClimateFuture_2021), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_GCF_GovClimateFuture_2021.pdf")




pdf("dfm_GCF_GovClimateFuture_2021.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_GCF_GovClimateFuture_2021, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```



```{r}
features <- textstat_frequency(dfm_GCF_GovClimateFuture_2021, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_GCF_GovClimateFuture_2021.pdf")


features$docname <- "GCF_GovClimateFuture_2021"
  
features %>% write_csv("features_four.csv")

```



#IEA_WorldEnergyOutlook_2021


```{r}

tokens_IEA_WorldEnergyOutlook_2021 <- tokens_subset(tokens_ngram, docname_ == "IEA_WorldEnergyOutlook_2021")


kwic_IEA_WorldEnergyOutlook_2021 <- kwic(tokens_IEA_WorldEnergyOutlook_2021, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_IEA_WorldEnergyOutlook_2021, "kwic_IEA_WorldEnergyOutlook_2021_health.csv")


corpus_IEA_WorldEnergyOutlook_2021 <- corpus(kwic_IEA_WorldEnergyOutlook_2021, split_context = FALSE, extract_keyword = TRUE)

dfm_IEA_WorldEnergyOutlook_2021 <- corpus_IEA_WorldEnergyOutlook_2021 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_IEA_WorldEnergyOutlook_2021), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_IEA_WorldEnergyOutlook_2021.pdf")




pdf("dfm_IEA_WorldEnergyOutlook_2021.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_IEA_WorldEnergyOutlook_2021, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```




```{r}
features <- textstat_frequency(dfm_IEA_WorldEnergyOutlook_2021, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_IEA_WorldEnergyOutlook_2021.pdf")


features$docname <- "IEA_WorldEnergyOutlook_2021"
  
features %>% write_csv("features_five.csv")

```





#IPBES_BiodiversityCCWorkshop_2021


```{r}

tokens_IPBES_BiodiversityCCWorkshop_2021 <- tokens_subset(tokens_ngram, docname_ == "IPBES_BiodiversityCCWorkshop_2021")


kwic_IPBES_BiodiversityCCWorkshop_2021 <- kwic(tokens_IPBES_BiodiversityCCWorkshop_2021, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_IPBES_BiodiversityCCWorkshop_2021, "kwic_IPBES_BiodiversityCCWorkshop_2021_health.csv")


corpus_IPBES_BiodiversityCCWorkshop_2021 <- corpus(kwic_IPBES_BiodiversityCCWorkshop_2021, split_context = FALSE, extract_keyword = TRUE)

dfm_IPBES_BiodiversityCCWorkshop_2021 <- corpus_IPBES_BiodiversityCCWorkshop_2021 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_IPBES_BiodiversityCCWorkshop_2021), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_IPBES_BiodiversityCCWorkshop_2021.pdf")




pdf("dfm_IPBES_BiodiversityCCWorkshop_2021.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_IPBES_BiodiversityCCWorkshop_2021, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```




```{r}
features <- textstat_frequency(dfm_IPBES_BiodiversityCCWorkshop_2021, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_IPBES_BiodiversityCCWorkshop_2021.pdf")


features$docname <- "IPBES_BiodiversityCCWorkshop_2021"
  
features %>% write_csv("features_six.csv")


```



#IPBES_GRBiodiversityEcosys_2019


```{r}

tokens_IPBES_GRBiodiversityEcosys_2019 <- tokens_subset(tokens_ngram, docname_ == "IPBES_GRBiodiversityEcosys_2019")


kwic_IPBES_GRBiodiversityEcosys_2019 <- kwic(tokens_IPBES_GRBiodiversityEcosys_2019, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_IPBES_GRBiodiversityEcosys_2019, "kwic_IPBES_GRBiodiversityEcosys_2019_health.csv")


corpus_IPBES_GRBiodiversityEcosys_2019 <- corpus(kwic_IPBES_GRBiodiversityEcosys_2019, split_context = FALSE, extract_keyword = TRUE)

dfm_IPBES_GRBiodiversityEcosys_2019 <- corpus_IPBES_GRBiodiversityEcosys_2019 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_IPBES_GRBiodiversityEcosys_2019), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_IPBES_GRBiodiversityEcosys_2019.pdf")




pdf("dfm_IPBES_GRBiodiversityEcosys_2019.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_IPBES_GRBiodiversityEcosys_2019, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```




```{r}
features <- textstat_frequency(dfm_IPBES_GRBiodiversityEcosys_2019, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_IPBES_GRBiodiversityEcosys_2019.pdf")

features$docname <- "IPBES_GRBiodiversityEcosys_2019"
  
features %>% write_csv("features_seven.csv")

```


#IPCC_AR6WGI_2021


```{r}

tokens_IPCC_AR6WGI_2021 <- tokens_subset(tokens_ngram, docname_ == "IPCC_AR6WGI_2021")


kwic_IPCC_AR6WGI_2021 <- kwic(tokens_IPCC_AR6WGI_2021, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_IPCC_AR6WGI_2021, "kwic_IPCC_AR6WGI_2021_health.csv")


corpus_IPCC_AR6WGI_2021 <- corpus(kwic_IPCC_AR6WGI_2021, split_context = FALSE, extract_keyword = TRUE)

dfm_IPCC_AR6WGI_2021 <- corpus_IPCC_AR6WGI_2021 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_IPCC_AR6WGI_2021), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_IPCC_AR6WGI_2021.pdf")




pdf("dfm_IPCC_AR6WGI_2021.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_IPCC_AR6WGI_2021, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```





```{r}
features <- textstat_frequency(dfm_IPCC_AR6WGI_2021, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_IPCC_AR6WGI_2021.pdf")


features$docname <- "IPCC_AR6WGI_2021"
  
features %>% write_csv("features_eight.csv")

```





#IPCC_SR15_2018


```{r}

tokens_IPCC_SR15_2018 <- tokens_subset(tokens_ngram, docname_ == "IPCC_SR15_2018")


kwic_IPCC_SR15_2018 <- kwic(tokens_IPCC_SR15_2018, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_IPCC_SR15_2018, "kwic_IPCC_SR15_2018_health.csv")


corpus_IPCC_SR15_2018 <- corpus(kwic_IPCC_SR15_2018, split_context = FALSE, extract_keyword = TRUE)

dfm_IPCC_SR15_2018 <- corpus_IPCC_SR15_2018 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_IPCC_SR15_2018), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_IPCC_SR15_2018.pdf")




pdf("dfm_IPCC_SR15_2018.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_IPCC_SR15_2018, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```




```{r}
features <- textstat_frequency(dfm_IPCC_SR15_2018, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_IPCC_SR15_2018.pdf")


features$docname <- "IPCC_SR15_2018"
  
features %>% write_csv("features_nine.csv")

```




#IPCC_SRCCL_2019


```{r}

tokens_IPCC_SRCCL_2019 <- tokens_subset(tokens_ngram, docname_ == "IPCC_SRCCL_2019")


kwic_IPCC_SRCCL_2019 <- kwic(tokens_IPCC_SRCCL_2019, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_IPCC_SRCCL_2019, "kwic_IPCC_SRCCL_2019_health.csv")


corpus_IPCC_SRCCL_2019 <- corpus(kwic_IPCC_SRCCL_2019, split_context = FALSE, extract_keyword = TRUE)

dfm_IPCC_SRCCL_2019 <- corpus_IPCC_SRCCL_2019 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_IPCC_SRCCL_2019), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_IPCC_SRCCL_2019.pdf")




pdf("dfm_IPCC_SRCCL_2019.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_IPCC_SRCCL_2019, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```



```{r}
features <- textstat_frequency(dfm_IPCC_SRCCL_2019, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_IPCC_SRCCL_2019.pdf")


features$docname <- "IPCC_SRCCL_2019"
  
features %>% write_csv("features_ten.csv")

```



#IPCC_SROCC_2019

```{r}

tokens_IPCC_SROCC_2019 <- tokens_subset(tokens_ngram, docname_ == "IPCC_SROCC_2019")


kwic_IPCC_SROCC_2019 <- kwic(tokens_IPCC_SROCC_2019, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_IPCC_SROCC_2019, "kwic_IPCC_SROCC_2019_health.csv")


corpus_IPCC_SROCC_2019 <- corpus(kwic_IPCC_SROCC_2019, split_context = FALSE, extract_keyword = TRUE)

dfm_IPCC_SROCC_2019 <- corpus_IPCC_SROCC_2019 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_IPCC_SROCC_2019), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_IPCC_SROCC_2019.pdf")




pdf("dfm_IPCC_SROCC_2019.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_IPCC_SROCC_2019, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```



```{r}
features <- textstat_frequency(dfm_IPCC_SROCC_2019, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_IPCC_SROCC_2019.pdf")


features$docname <- "IPCC_SROCC_2019"
  
features %>% write_csv("features_eleven.csv")

```



#MIT_FWGlobalOutlook_2018

```{r}

tokens_MIT_FWGlobalOutlook_2018 <- tokens_subset(tokens_ngram, docname_ == "MIT_FWGlobalOutlook_2018")


kwic_MIT_FWGlobalOutlook_2018 <- kwic(tokens_MIT_FWGlobalOutlook_2018, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_MIT_FWGlobalOutlook_2018, "kwic_MIT_FWGlobalOutlook_2018_health.csv")


corpus_MIT_FWGlobalOutlook_2018 <- corpus(kwic_MIT_FWGlobalOutlook_2018, split_context = FALSE, extract_keyword = TRUE)

dfm_MIT_FWGlobalOutlook_2018 <- corpus_MIT_FWGlobalOutlook_2018 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_MIT_FWGlobalOutlook_2018), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_MIT_FWGlobalOutlook_2018.pdf")




pdf("dfm_MIT_FWGlobalOutlook_2018.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_MIT_FWGlobalOutlook_2018, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```



```{r}
features <- textstat_frequency(dfm_MIT_FWGlobalOutlook_2018, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_MIT_FWGlobalOutlook_2018.pdf")


features$docname <- "MIT_FWGlobalOutlook_2018"
  
features %>% write_csv("features_twelve.csv")
```



#SEI_PGR2021_2021

```{r}

tokens_SEI_PGR2021_2021 <- tokens_subset(tokens_ngram, docname_ == "SEI_PGR2021_2021")


kwic_SEI_PGR2021_2021 <- kwic(tokens_SEI_PGR2021_2021, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_SEI_PGR2021_2021, "kwic_SEI_PGR2021_2021_health.csv")


corpus_SEI_PGR2021_2021 <- corpus(kwic_SEI_PGR2021_2021, split_context = FALSE, extract_keyword = TRUE)

dfm_SEI_PGR2021_2021 <- corpus_SEI_PGR2021_2021 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_SEI_PGR2021_2021), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_SEI_PGR2021_2021.pdf")




pdf("dfm_SEI_PGR2021_2021.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_SEI_PGR2021_2021, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```



```{r}
features <- textstat_frequency(dfm_SEI_PGR2021_2021, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_SEI_PGR2021_2021.pdf")

features$docname <- "SEI_PGR2021_2021"
  
features %>% write_csv("features_thirteen.csv")

```




#UN_SRDroughts_2021

```{r}

tokens_UN_SRDroughts_2021 <- tokens_subset(tokens_ngram, docname_ == "UN_SRDroughts_2021")


kwic_UN_SRDroughts_2021 <- kwic(tokens_UN_SRDroughts_2021, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_UN_SRDroughts_2021, "kwic_UN_SRDroughts_2021_health.csv")


corpus_UN_SRDroughts_2021 <- corpus(kwic_UN_SRDroughts_2021, split_context = FALSE, extract_keyword = TRUE)

dfm_UN_SRDroughts_2021 <- corpus_UN_SRDroughts_2021 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_UN_SRDroughts_2021), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_UN_SRDroughts_2021.pdf")




pdf("dfm_UN_SRDroughts_2021.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_UN_SRDroughts_2021, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```




```{r}
features <- textstat_frequency(dfm_UN_SRDroughts_2021, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_UN_SRDroughts_2021.pdf")


features$docname <- "UN_SRDroughts_2021"
  
features %>% write_csv("features_fourteen.csv")

```




#UN_UNFSS2021_2021

```{r}

tokens_UN_UNFSS2021_2021 <- tokens_subset(tokens_ngram, docname_ == "UN_UNFSS2021_2021")


kwic_UN_UNFSS2021_2021 <- kwic(tokens_UN_UNFSS2021_2021, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_UN_UNFSS2021_2021, "kwic_UN_UNFSS2021_2021_health.csv")


corpus_UN_UNFSS2021_2021 <- corpus(kwic_UN_UNFSS2021_2021, split_context = FALSE, extract_keyword = TRUE)

dfm_UN_UNFSS2021_2021 <- corpus_UN_UNFSS2021_2021 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_UN_UNFSS2021_2021), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_UN_UNFSS2021_2021.pdf")




pdf("dfm_UN_UNFSS2021_2021.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_UN_UNFSS2021_2021, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```



```{r}
features <- textstat_frequency(dfm_UN_UNFSS2021_2021, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_UN_UNFSS2021_2021.pdf")

features$docname <- "UN_UNFSS2021_2021"
  
features %>% write_csv("features_fifteen.csv")

```

                      



#UNDP_NDC_2019

```{r}

tokens_UNDP_NDC_2019 <- tokens_subset(tokens_ngram, docname_ == "UNDP_NDC_2019")


kwic_UNDP_NDC_2019 <- kwic(tokens_UNDP_NDC_2019, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_UNDP_NDC_2019, "kwic_UNDP_NDC_2019_health.csv")


corpus_UNDP_NDC_2019 <- corpus(kwic_UNDP_NDC_2019, split_context = FALSE, extract_keyword = TRUE)

dfm_UNDP_NDC_2019 <- corpus_UNDP_NDC_2019 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_UNDP_NDC_2019), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_UNDP_NDC_2019.pdf")




pdf("dfm_UNDP_NDC_2019.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_UNDP_NDC_2019, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```




```{r}
features <- textstat_frequency(dfm_UNDP_NDC_2019, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_UNDP_NDC_2019.pdf")

features$docname <- "UNDP_NDC_2019"
  
features %>% write_csv("features_sixteen.csv")

```



#UNEP_AGR2020_2020


```{r}

tokens_UNEP_AGR2020_2020 <- tokens_subset(tokens_ngram, docname_ == "UNEP_AGR2020_2020")


kwic_UNEP_AGR2020_2020 <- kwic(tokens_UNEP_AGR2020_2020, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_UNEP_AGR2020_2020, "kwic_UNEP_AGR2020_2020_health.csv")


corpus_UNEP_AGR2020_2020 <- corpus(kwic_UNEP_AGR2020_2020, split_context = FALSE, extract_keyword = TRUE)

dfm_UNEP_AGR2020_2020 <- corpus_UNEP_AGR2020_2020 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_UNEP_AGR2020_2020), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_UNEP_AGR2020_2020.pdf")




pdf("dfm_UNEP_AGR2020_2020.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_UNEP_AGR2020_2020, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```




```{r}
features <- textstat_frequency(dfm_UNEP_AGR2020_2020, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_UNEP_AGR2020_2020.pdf")

features$docname <- "UNEP_AGR2020_2020"
  
features %>% write_csv("features_seventeen.csv")

```




#UNEP_AWBBB_2021


```{r}

tokens_UNEP_AWBBB_2021 <- tokens_subset(tokens_ngram, docname_ == "UNEP_AWBBB_2021")


kwic_UNEP_AWBBB_2021 <- kwic(tokens_UNEP_AWBBB_2021, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_UNEP_AWBBB_2021, "kwic_UNEP_AWBBB_2021_health.csv")


corpus_UNEP_AWBBB_2021 <- corpus(kwic_UNEP_AWBBB_2021, split_context = FALSE, extract_keyword = TRUE)

dfm_UNEP_AWBBB_2021 <- corpus_UNEP_AWBBB_2021 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_UNEP_AWBBB_2021), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_UNEP_AWBBB_2021.pdf")




pdf("dfm_UNEP_AWBBB_2021.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_UNEP_AWBBB_2021, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```


```{r}
features <- textstat_frequency(dfm_UNEP_AWBBB_2021, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_UNEP_AWBBB_2021.pdf")

features$docname <- "UNEP_AWBBB_2021"
  
features %>% write_csv("features_eighteen.csv")

```



#UNEP_EmissionsGap_2020


```{r}

tokens_UNEP_EmissionsGap_2020 <- tokens_subset(tokens_ngram, docname_ == "UNEP_EmissionsGap_2020")


kwic_UNEP_EmissionsGap_2020 <- kwic(tokens_UNEP_EmissionsGap_2020, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_UNEP_EmissionsGap_2020, "kwic_UNEP_EmissionsGap_2020_health.csv")


corpus_UNEP_EmissionsGap_2020 <- corpus(kwic_UNEP_EmissionsGap_2020, split_context = FALSE, extract_keyword = TRUE)

dfm_UNEP_EmissionsGap_2020 <- corpus_UNEP_EmissionsGap_2020 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_UNEP_EmissionsGap_2020), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_UNEP_EmissionsGap_2020.pdf")




pdf("dfm_UNEP_EmissionsGap_2020.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_UNEP_EmissionsGap_2020, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```



```{r}
features <- textstat_frequency(dfm_UNEP_EmissionsGap_2020, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_UNEP_EmissionsGap_2020.pdf")

features$docname <- "UNEP_EmissionsGap_2020"
  
features %>% write_csv("features_nineteen.csv")

```


#UNEP_ERPNC_2021


```{r}

tokens_UNEP_ERPNC_2021 <- tokens_subset(tokens_ngram, docname_ == "UNEP_ERPNC_2021")


kwic_UNEP_ERPNC_2021 <- kwic(tokens_UNEP_ERPNC_2021, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_UNEP_ERPNC_2021, "kwic_UNEP_ERPNC_2021_health.csv")


corpus_UNEP_ERPNC_2021 <- corpus(kwic_UNEP_ERPNC_2021, split_context = FALSE, extract_keyword = TRUE)

dfm_UNEP_ERPNC_2021 <- corpus_UNEP_ERPNC_2021 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_UNEP_ERPNC_2021), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_UNEP_ERPNC_2021.pdf")




pdf("dfm_UNEP_ERPNC_2021.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_UNEP_ERPNC_2021, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```



```{r}
features <- textstat_frequency(dfm_UNEP_ERPNC_2021, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_UNEP_ERPNC_2021.pdf")


features$docname <- "UNEP_ERPNC_2021"
  
features %>% write_csv("features_twenty.csv")

```


#UNEP_MPN_2021


```{r}

tokens_UNEP_MPN_2021 <- tokens_subset(tokens_ngram, docname_ == "UNEP_MPN_2021")


kwic_UNEP_MPN_2021 <- kwic(tokens_UNEP_MPN_2021, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_UNEP_MPN_2021, "kwic_UNEP_MPN_2021_health.csv")


corpus_UNEP_MPN_2021 <- corpus(kwic_UNEP_MPN_2021, split_context = FALSE, extract_keyword = TRUE)

dfm_UNEP_MPN_2021 <- corpus_UNEP_MPN_2021 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_UNEP_MPN_2021), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_UNEP_MPN_2021.pdf")




pdf("dfm_UNEP_MPN_2021.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_UNEP_MPN_2021, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```


                     
                      


```{r}
features <- textstat_frequency(dfm_UNEP_MPN_2021, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_UNEP_MPN_2021.pdf")

features$docname <- "UNEP_MPN_2021"
  
features %>% write_csv("features_twenty1.csv")

```      



#UNEP_ProtectedPlanetReport_2018


```{r}

tokens_UNEP_ProtectedPlanetReport_2018 <- tokens_subset(tokens_ngram, docname_ == "UNEP_ProtectedPlanetReport_2018")


kwic_UNEP_ProtectedPlanetReport_2018 <- kwic(tokens_UNEP_ProtectedPlanetReport_2018, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_UNEP_ProtectedPlanetReport_2018, "kwic_UNEP_ProtectedPlanetReport_2018_health.csv")


corpus_UNEP_ProtectedPlanetReport_2018 <- corpus(kwic_UNEP_ProtectedPlanetReport_2018, split_context = FALSE, extract_keyword = TRUE)

dfm_UNEP_ProtectedPlanetReport_2018 <- corpus_UNEP_ProtectedPlanetReport_2018 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_UNEP_ProtectedPlanetReport_2018), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_UNEP_ProtectedPlanetReport_2018.pdf")




pdf("dfm_UNEP_ProtectedPlanetReport_2018.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_UNEP_ProtectedPlanetReport_2018, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```




```{r}
features <- textstat_frequency(dfm_UNEP_ProtectedPlanetReport_2018, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_UNEP_ProtectedPlanetReport_2018.pdf")

features$docname <- "UNEP_ProtectedPlanetReport_2018"
  
features %>% write_csv("features_twenty2.csv")

```      



#UNEP_WF20EN_2020


```{r}

tokens_UNEP_WF20EN_2020 <- tokens_subset(tokens_ngram, docname_ == "UNEP_WF20EN_2020")


kwic_UNEP_WF20EN_2020 <- kwic(tokens_UNEP_WF20EN_2020, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_UNEP_WF20EN_2020, "kwic_UNEP_WF20EN_2020_health.csv")


corpus_UNEP_WF20EN_2020 <- corpus(kwic_UNEP_WF20EN_2020, split_context = FALSE, extract_keyword = TRUE)

dfm_UNEP_WF20EN_2020 <- corpus_UNEP_WF20EN_2020 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_UNEP_WF20EN_2020), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_UNEP_WF20EN_2020.pdf")




pdf("dfm_UNEP_WF20EN_2020.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_UNEP_WF20EN_2020, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```




```{r}
features <- textstat_frequency(dfm_UNEP_WF20EN_2020, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_UNEP_WF20EN_2020.pdf")

features$docname <- "UNEP_WF20EN_2020"
  
features %>% write_csv("features_twenty3.csv")

```      



#UNFCC_ClimateActionSupport_2019


```{r}

tokens_UNFCC_ClimateActionSupport_2019 <- tokens_subset(tokens_ngram, docname_ == "UNFCC_ClimateActionSupport_2019")


kwic_UNFCC_ClimateActionSupport_2019 <- kwic(tokens_UNFCC_ClimateActionSupport_2019, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_UNFCC_ClimateActionSupport_2019, "kwic_UNFCC_ClimateActionSupport_2019_health.csv")


corpus_UNFCC_ClimateActionSupport_2019 <- corpus(kwic_UNFCC_ClimateActionSupport_2019, split_context = FALSE, extract_keyword = TRUE)

dfm_UNFCC_ClimateActionSupport_2019 <- corpus_UNFCC_ClimateActionSupport_2019 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_UNFCC_ClimateActionSupport_2019), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_UNFCC_ClimateActionSupport_2019.pdf")




pdf("dfm_UNFCC_ClimateActionSupport_2019.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_UNFCC_ClimateActionSupport_2019, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```


      


```{r}
features <- textstat_frequency(dfm_UNFCC_ClimateActionSupport_2019, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_UNFCC_ClimateActionSupport_2019.pdf")

features$docname <- "UNFCC_ClimateActionSupport_2019"
  
features %>% write_csv("features_twenty4.csv")

```    



#UNFCC_LandWaterFoodCOP26_2021


```{r}

tokens_UNFCC_LandWaterFoodCOP26_2021 <- tokens_subset(tokens_ngram, docname_ == "UNFCC_LandWaterFoodCOP26_2021")


kwic_UNFCC_LandWaterFoodCOP26_2021 <- kwic(tokens_UNFCC_LandWaterFoodCOP26_2021, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_UNFCC_LandWaterFoodCOP26_2021, "kwic_UNFCC_LandWaterFoodCOP26_2021_health.csv")


corpus_UNFCC_LandWaterFoodCOP26_2021 <- corpus(kwic_UNFCC_LandWaterFoodCOP26_2021, split_context = FALSE, extract_keyword = TRUE)

dfm_UNFCC_LandWaterFoodCOP26_2021 <- corpus_UNFCC_LandWaterFoodCOP26_2021 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_UNFCC_LandWaterFoodCOP26_2021), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_UNFCC_LandWaterFoodCOP26_2021.pdf")




pdf("dfm_UNFCC_LandWaterFoodCOP26_2021.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_UNFCC_LandWaterFoodCOP26_2021, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```




```{r}
features <- textstat_frequency(dfm_UNFCC_LandWaterFoodCOP26_2021, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_UNFCC_LandWaterFoodCOP26_2021.pdf")

features$docname <- "UNFCC_LandWaterFoodCOP26_2021"
  
features %>% write_csv("features_twenty5.csv")


```    



#UNFCC_NDCsParis_2021


```{r}

tokens_UNFCC_NDCsParis_2021 <- tokens_subset(tokens_ngram, docname_ == "UNFCC_NDCsParis_2021")


kwic_UNFCC_NDCsParis_2021 <- kwic(tokens_UNFCC_NDCsParis_2021, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_UNFCC_NDCsParis_2021, "kwic_UNFCC_NDCsParis_2021_health.csv")


corpus_UNFCC_NDCsParis_2021 <- corpus(kwic_UNFCC_NDCsParis_2021, split_context = FALSE, extract_keyword = TRUE)

dfm_UNFCC_NDCsParis_2021 <- corpus_UNFCC_NDCsParis_2021 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_UNFCC_NDCsParis_2021), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_UNFCC_NDCsParis_2021.pdf")




pdf("dfm_UNFCC_NDCsParis_2021.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_UNFCC_NDCsParis_2021, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```



```{r}
features <- textstat_frequency(dfm_UNFCC_NDCsParis_2021, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_UNFCC_NDCsParis_2021.pdf")

features$docname <- "UNFCC_NDCsParis_2021"
  
features %>% write_csv("features_twenty6.csv")

```    



#UNSG_ClimateActionSupport_2019


```{r}

tokens_UNSG_ClimateActionSupport_2019 <- tokens_subset(tokens_ngram, docname_ == "UNSG_ClimateActionSupport_2019")


kwic_UNSG_ClimateActionSupport_2019 <- kwic(tokens_UNSG_ClimateActionSupport_2019, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_UNSG_ClimateActionSupport_2019, "kwic_UNSG_ClimateActionSupport_2019_health.csv")


corpus_UNSG_ClimateActionSupport_2019 <- corpus(kwic_UNSG_ClimateActionSupport_2019, split_context = FALSE, extract_keyword = TRUE)

dfm_UNSG_ClimateActionSupport_2019 <- corpus_UNSG_ClimateActionSupport_2019 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_UNSG_ClimateActionSupport_2019), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_UNSG_ClimateActionSupport_2019.pdf")




pdf("dfm_UNSG_ClimateActionSupport_2019.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_UNSG_ClimateActionSupport_2019, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```




```{r}
features <- textstat_frequency(dfm_UNSG_ClimateActionSupport_2019, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_UNSG_ClimateActionSupport_2019.pdf")

features$docname <- "UNSG_ClimateActionSupport_2019"
  
features %>% write_csv("features_twenty7.csv")

```    



#UNSG_OurCommAgenda_2021


```{r}

tokens_UNSG_OurCommAgenda_2021 <- tokens_subset(tokens_ngram, docname_ == "UNSG_OurCommAgenda_2021")


kwic_UNSG_OurCommAgenda_2021 <- kwic(tokens_UNSG_OurCommAgenda_2021, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_UNSG_OurCommAgenda_2021, "kwic_UNSG_OurCommAgenda_2021_health.csv")


corpus_UNSG_OurCommAgenda_2021 <- corpus(kwic_UNSG_OurCommAgenda_2021, split_context = FALSE, extract_keyword = TRUE)

dfm_UNSG_OurCommAgenda_2021 <- corpus_UNSG_OurCommAgenda_2021 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_UNSG_OurCommAgenda_2021), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_UNSG_OurCommAgenda_2021.pdf")




pdf("dfm_UNSG_OurCommAgenda_20219.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_UNSG_OurCommAgenda_2021, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```




```{r}
features <- textstat_frequency(dfm_UNSG_OurCommAgenda_2021, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_UNSG_OurCommAgenda_2021.pdf")

features$docname <- "UNSG_OurCommAgenda_2021"
  
features %>% write_csv("features_twenty8.csv")

```    




#WMO_GlobalClimate_2019


```{r}

tokens_WMO_GlobalClimate_2019 <- tokens_subset(tokens_ngram, docname_ == "WMO_GlobalClimate_2019")


kwic_WMO_GlobalClimate_2019 <- kwic(tokens_WMO_GlobalClimate_2019, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_WMO_GlobalClimate_2019, "kwic_WMO_GlobalClimate_2019_health.csv")


corpus_WMO_GlobalClimate_2019 <- corpus(kwic_WMO_GlobalClimate_2019, split_context = FALSE, extract_keyword = TRUE)

dfm_WMO_GlobalClimate_2019 <- corpus_WMO_GlobalClimate_2019 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_WMO_GlobalClimate_2019), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_WMO_GlobalClimate_2019.pdf")




pdf("dfm_WMO_GlobalClimate_2019.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_WMO_GlobalClimate_2019, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```




```{r}
features <- textstat_frequency(dfm_WMO_GlobalClimate_2019, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_WMO_GlobalClimate_2019.pdf")

features$docname <- "WMO_GlobalClimate_2019"
  
features %>% write_csv("features_twenty9.csv")

```    




#WMO_StateClimateCrisis_2020


```{r}

tokens_WMO_StateClimateCrisis_2020 <- tokens_subset(tokens_ngram, docname_ == "WMO_StateClimateCrisis_2020")


kwic_WMO_StateClimateCrisis_2020 <- kwic(tokens_WMO_StateClimateCrisis_2020, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_WMO_StateClimateCrisis_2020, "kwic_WMO_StateClimateCrisis_2020_health.csv")


corpus_WMO_StateClimateCrisis_2020 <- corpus(kwic_WMO_StateClimateCrisis_2020, split_context = FALSE, extract_keyword = TRUE)

dfm_WMO_StateClimateCrisis_2020 <- corpus_WMO_StateClimateCrisis_2020 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_WMO_StateClimateCrisis_2020), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_WMO_StateClimateCrisis_2020.pdf")




pdf("dfm_WMO_StateClimateCrisis_2020.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_WMO_StateClimateCrisis_2020, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```




```{r}
features <- textstat_frequency(dfm_WMO_StateClimateCrisis_2020, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_WMO_StateClimateCrisis_2020.pdf")

features$docname <- "WMO_StateClimateCrisis_2020"
  
features %>% write_csv("features_thirty.csv")

```    


#WMO_StateGlobalClimate_2020


```{r}

tokens_WMO_StateGlobalClimate_2020 <- tokens_subset(tokens_ngram, docname_ == "WMO_StateGlobalClimate_2020")


kwic_WMO_StateGlobalClimate_2020 <- kwic(tokens_WMO_StateGlobalClimate_2020, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_WMO_StateGlobalClimate_2020, "kwic_WMO_StateGlobalClimate_2020_health.csv")


corpus_WMO_StateGlobalClimate_2020 <- corpus(kwic_WMO_StateGlobalClimate_2020, split_context = FALSE, extract_keyword = TRUE)

dfm_WMO_StateGlobalClimate_2020 <- corpus_WMO_StateGlobalClimate_2020 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_WMO_StateGlobalClimate_2020), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_WMO_StateGlobalClimate_2020.pdf")




pdf("dfm_WMO_StateGlobalClimate_2020.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_WMO_StateGlobalClimate_2020, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```





```{r}
features <- textstat_frequency(dfm_WMO_StateGlobalClimate_2020, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_WMO_StateGlobalClimate_2020.pdf")

features$docname <- "WMO_StateGlobalClimate_2020"
  
features %>% write_csv("features_thirty1.csv")

```   


#WMO_UnitedInScience_2021


```{r}

tokens_WMO_UnitedInScience_2021 <- tokens_subset(tokens_ngram, docname_ == "WMO_UnitedInScience_2021")


kwic_WMO_UnitedInScience_2021 <- kwic(tokens_WMO_UnitedInScience_2021, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(kwic_WMO_UnitedInScience_2021, "kwic_WMO_UnitedInScience_2021_health.csv")


corpus_WMO_UnitedInScience_2021 <- corpus(kwic_WMO_UnitedInScience_2021, split_context = FALSE, extract_keyword = TRUE)

dfm_WMO_UnitedInScience_2021 <- corpus_WMO_UnitedInScience_2021 %>% tokens() %>% dfm()



# network graph
fcm <- fcm(tokens(corpus_WMO_UnitedInScience_2021), 
           context = "window", count = "frequency", window = 5L, span_sentence = FALSE, tri = FALSE)

#fcm_dict <- fcm_select(fcm, pattern = combined_dict, selection = "keep", valuetype = "regex")

feat <- names(topfeatures(fcm, 40))

set.seed(1234)

fcm_select(fcm, pattern = feat, verbose = FALSE) %>%
    textplot_network(min_freq = 0.5)

 ggsave("network_WMO_UnitedInScience_2021.pdf")




pdf("dfm_WMO_UnitedInScience_2021.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_WMO_UnitedInScience_2021, random_order = FALSE,  
                   min_size = 0.5, max_size = 5, min_count = 3, max_words = 50, color = "red",)


dev.off()

```





```{r}
features <- textstat_frequency(dfm_WMO_UnitedInScience_2021, n = 50)

# Sort by reverse frequency order
features$feature <- with(features, reorder(feature, -frequency))

ggplot(features, aes(x = feature, y = frequency)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("frequency_WMO_UnitedInScience_2021.pdf")

features$docname <- "WMO_UnitedInScience_2021"
  
features %>% write_csv("features_thirty2.csv")

```   




```{r}

one <- kwic_CBD_ConventionBiodiversitycop2014_2019 %>%
  group_by(docname) %>%
  summarise(n = n())


two <- kwic_CBD_GlobalBiodiversityOutlook5_2020 %>%
  group_by(docname) %>%
  summarise(n = n())


three <- kwic_FAO_AgriFoodSys_2021 %>%
  group_by(docname) %>%
  summarise(n = n())

four <- kwic_GCF_GovClimateFuture_2021 %>%
  group_by(docname) %>%
  summarise(n = n())

five <- kwic_IEA_WorldEnergyOutlook_2021 %>%
  group_by(docname) %>%
  summarise(n = n())

six <- kwic_IPBES_BiodiversityCCWorkshop_2021 %>%
  group_by(docname) %>%
  summarise(n = n())

seven <- kwic_IPBES_GRBiodiversityEcosys_2019 %>%
  group_by(docname) %>%
  summarise(n = n())

eight <- kwic_IPCC_AR6WGI_2021 %>%
  group_by(docname) %>%
  summarise(n = n())

nine <- kwic_IPCC_SR15_2018 %>%
  group_by(docname) %>%
  summarise(n = n())

ten <- kwic_IPCC_SRCCL_2019 %>%
  group_by(docname) %>%
  summarise(n = n())


eleven <- kwic_IPCC_SROCC_2019 %>%
  group_by(docname) %>%
  summarise(n = n())


twelve <- kwic_MIT_FWGlobalOutlook_2018 %>%
  group_by(docname) %>%
  summarise(n = n())


thirteen <- kwic_SEI_PGR2021_2021 %>%
  group_by(docname) %>%
  summarise(n = n())


fourteen <- kwic_UN_SRDroughts_2021 %>%
  group_by(docname) %>%
  summarise(n = n())


fifteen <- kwic_UN_UNFSS2021_2021 %>%
  group_by(docname) %>%
  summarise(n = n())


sixteen <- kwic_UNDP_NDC_2019 %>%
  group_by(docname) %>%
  summarise(n = n())


seventeen <- kwic_UNEP_AGR2020_2020 %>%
  group_by(docname) %>%
  summarise(n = n())


eighteen <- kwic_UNEP_AWBBB_2021 %>%
  group_by(docname) %>%
  summarise(n = n())


nineteen <- kwic_UNEP_EmissionsGap_2020 %>%
  group_by(docname) %>%
  summarise(n = n())

twenty <- kwic_UNEP_ERPNC_2021 %>%
  group_by(docname) %>%
  summarise(n = n())

twenty1 <- kwic_UNEP_MPN_2021 %>%
  group_by(docname) %>%
  summarise(n = n())


twenty2 <- kwic_UNEP_ProtectedPlanetReport_2018 %>%
  group_by(docname) %>%
  summarise(n = n())

twenty3 <- kwic_UNEP_WF20EN_2020 %>%
  group_by(docname) %>%
  summarise(n = n())


twenty4 <- kwic_UNFCC_ClimateActionSupport_2019 %>%
  group_by(docname) %>%
  summarise(n = n())


twenty5 <- kwic_UNFCC_LandWaterFoodCOP26_2021 %>%
  group_by(docname) %>%
  summarise(n = n())


twenty6 <- kwic_UNFCC_NDCsParis_2021 %>%
  group_by(docname) %>%
  summarise(n = n())


twenty7 <- kwic_UNSG_ClimateActionSupport_2019 %>%
  group_by(docname) %>%
  summarise(n = n())

twenty8 <- kwic_UNSG_OurCommAgenda_2021 %>%
  group_by(docname) %>%
  summarise(n = n())


twenty9 <- kwic_WMO_GlobalClimate_2019 %>%
  group_by(docname) %>%
  summarise(n = n())

thirty <- kwic_WMO_StateClimateCrisis_2020 %>%
  group_by(docname) %>%
  summarise(n = n())

thirty1 <- kwic_WMO_StateGlobalClimate_2020 %>%
  group_by(docname) %>%
  summarise(n = n())

thirty2 <- kwic_WMO_UnitedInScience_2021 %>%
  group_by(docname) %>%
  summarise(n = n())


```



```{r}
summary <- read_csv("corpus_summary.csv")
mentions <- bind_rows(one, two, three, four, five, six, seven, eight, nine, ten, eleven, twelve, thirteen, fourteen, fifteen, sixteen, seventeen,eighteen, nineteen, twenty,twenty1, twenty2,twenty3,twenty4,twenty5,twenty6,twenty7,twenty8,twenty9,thirty, thirty1, thirty2) %>% cbind(summary) %>% select(-Text, - Types) %>% rename(mentions = n, total_words = Tokens)

write_csv(mentions, "mentions.csv")
```




```{r}
features_one <- read_csv("features_one.csv")


features_two <- read_csv("features_two.csv")


features_three <- read_csv("features_three.csv")

features_four <- read_csv("features_four.csv")

features_five <- read_csv("features_five.csv")


features_six <- read_csv("features_six.csv")

features_seven <- read_csv("features_seven.csv")

features_eight <- read_csv("features_eight.csv")

features_nine <- read_csv("features_nine.csv")

features_ten <- read_csv("features_ten.csv")


features_eleven <- read_csv("features_eleven.csv")


features_twelve <- read_csv("features_twelve.csv")


features_thirteen <- read_csv("features_thirteen.csv")


features_fourteen <- read_csv("features_fourteen.csv")


features_fifteen <- read_csv("features_fifteen.csv")


features_sixteen <- read_csv("features_sixteen.csv")


features_seventeen <- read_csv("features_seventeen.csv")


features_eighteen <- read_csv("features_eighteen.csv")


features_nineteen <- read_csv("features_nineteen.csv")

features_twenty <- read_csv("features_twenty.csv")

features_twenty1 <- read_csv("features_twenty1.csv")


features_twenty2 <- read_csv("features_twenty2.csv")

features_twenty3 <- read_csv("features_twenty3.csv")


features_twenty4 <- read_csv("features_twenty4.csv")


features_twenty5 <- read_csv("features_twenty5.csv")


features_twenty6 <- read_csv("features_twenty6.csv")


features_twenty7 <- read_csv("features_twenty7.csv")

features_twenty8 <- read_csv("features_twenty8.csv")


features_twenty9 <- read_csv("features_twenty9.csv")

features_thirty <- read_csv("features_thirty.csv")

features_thirty1 <- read_csv("features_thirty1.csv")

features_thirty2 <- read_csv("features_thirty2.csv")

```


```{r}
freq <- bind_rows(features_one, features_two, features_three, features_four, features_five, features_six, features_seven, features_eight, features_nine, features_ten, features_eleven, features_twelve, features_thirteen, features_fourteen, features_fifteen, features_sixteen, features_seventeen,features_eighteen, features_nineteen, features_twenty,features_twenty1, features_twenty2,features_twenty3,features_twenty4,features_twenty5,features_twenty6,features_twenty7,features_twenty8,features_twenty9,features_thirty, features_thirty1, features_thirty2)



freq <- freq %>% group_by(feature) %>% summarise(freq = sum(frequency)) %>% arrange(desc(freq)) %>% head(50)


write_csv(freq, "freq.csv")

freq$feature <- with(freq, reorder(feature, -freq))

ggplot(freq, aes(x = feature, y = freq)) +
    geom_point() + 
  theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave("total_frequency.pdf")

```










